This script describes an example analysis for cyTOF data in R.
The base of this script was developed beginning of 2016 and only slighlty changed since. Thus it is likely that there are better ways to dot his nowadays! Alternative resources for cyTOF analysis are:
if(!require('bbRtools')){
devtools::install_git('https://github.com/BodenmillerGroup/bbRtools')
}
## Loading required package: bbRtools
If anything is not installed, install it via install.packages. For Bioconductor packages, follow the bioconductor instructions.
Set your project specific folders and settings
# input files: paths to your input and output folder
# a folder containing FCS files to be analysised
fcs_folder = './exampledata/'
out_folder = '/home/vitoz/Data/rseminaroutput/'
# the random seed
rand_seed = 1234
# Should the script use only the cells subsampled at the tsne step?
subsampled_cells_only = F
## load tsne: should the script try to load an existing tsne?
load_tsne = F
load_phenograph = F
load_flowsom = F
# filename of the tsne saved by the script
# choose the filename of the tsne such that it reflects the tsne setting
fn_tsne = file.path(out_folder, '20170222_tsne_out.rdata')
fn_phenograph = file.path(out_folder, '20170222_pheno_out.rdata')
fn_flowsom = file.path(out_folder, '20170222_flowsom_out.rdata')
# For plotting only: removes outliers with values higher than xx% of all cells
censor_val = 0.999
# point size for plots
size = 0.1
# define excluded channels
crap_channels = c("Time" ,"Event_length" ,"X89Y_BC1" , "X96Ru_Vol1" , "X98Ru_Vol2","X99Ru_Vol3" , "X100Ru_Vol4" , "X101Ru_Vol5" , "X102Ru_Vol6" , "X103Rh_BC2" , "X104Ru_Vol7" , "X105Pd_BC3" , "X106Pd_BC4" , "X108Pd_BC5" , "X110Pd_BC6" , "X113In_BC7" , "X115In_BC8" , "X120Sn" , "X131Xe" , "X138Ba" , "X140Ce_Beads" , "X150Nd_CD68" , "X151Eu" , "X156Gd" ,"X157Gd" , "X173Yb_CD3" , "X176Yb_CD45" , "X190BCKG" , "X191Ir_DNA1" , "X193Ir_DNA2" , "X195Pt" , "X196Pt" , "X208Pb" , "X209Bi_BC9" ,"MCB102" , "MCB104" ,"MCB105" ,"MCB106", "MCB108" , "MCB110" , "MCB113" , "MCB115" ,"DNA191" , "DNA193" , "Live194" ,
"Live195" , "beadDist" ,"barcode" ,"Beads140"
)
# load data
fcs_files = list.files(fcs_folder,pattern = '*.fcs')
dat = bbRtools::loadConvertMultiFCS(fileDir = fcs_folder)
# give each cell an own ID
dat[, id := paste(1:.N, .BY), by=condition]
setkey(dat, id)
# 'melt' data: make a column 'channel' with all the channels and a column 'counts' with all the counts
dat = melt.data.table(dat, id.vars=c('condition','id'), variable.name='channel', value.name = 'counts' , variable.factor = FALSE)
Here we simply extract metadata from the filename - however it is entirely possible to create such a table in excel, save it as a csv and load it here.
Using ‘merge’ the metadata can allways be added whenever needed! This is extremely helpfull, e.g. when looking at clinical data.
dat_meta = data.table(condition=dat[, unique(condition)])
dat_meta[, ':='(
mdm = bbRtools::getInfoFromFileList(condition,strPos = 1),
plate = bbRtools::getInfoFromFileList(condition,strPos = 2),
well = bbRtools::getInfoFromFileList(condition,strPos = 3),
stimulation = bbRtools::getInfoFromFileList(condition,strPos = c(4,5))
)]
# remove the NA counts
dat = subset(dat, !is.na(counts))
### calculate transformed counts ####
dat[ , counts_transf := asinh(counts/5)]
dat[, counts_scaled := counts/quantile(counts, 0.99), by=.(channel)]
dat[counts_scaled > 1, counts_scaled := 1]
Otherwise add the channel to the ‘crap’ channels list
# make a list of good channels
good_channels = unique(dat$channel)[!unique(dat$channel) %in% crap_channels]
print(good_channels)
## [1] "CD209" "CD64" "CD38" "CD68" "CD36" "CD71" "CD155"
## [8] "CD206" "CD13" "CD204" "CD123" "CD11b" "CD40" "Slamf7"
## [15] "CD197" "CD273" "CD304" "CD166" "CD82" "CD274" "CD119"
## [22] "CXCR4" "CD87" "CD120b" "CD4" "CD32" "CD16" "CD14"
## [29] "CD163" "CD169" "CD86" "HLA.ABC" "CD81" "CD88" "HLA.DR"
## [36] "CD54"
Tsne has a problem with duplicated cells, so remove them
# remove duplicate in live sample before running t-SNE
dat[,sum_all := sum(counts[channel %in% good_channels]), by= id]
# remove all 0 cells
dat = subset(dat, sum_all != 0)
# equal to: dat = dat[sum_all !=0,]
If save tsne is activated,
###### Run Tsne #####
# make a list
setkey(dat, id)
# number of cells per condition
print(dat[ , .(nCells = length(unique(id))), by=condition])
## condition nCells
## 1: MDM4x_P10_B2_IL4_1_CD68 pos.fcs 8665
## 2: MDM4x_P10_B3_IC_1_CD68 pos.fcs 10016
## 3: MDM4x_P10_B4_IL10_1_CD68 pos.fcs 8574
## 4: MDM4x_P10_B5_GC_1_CD68 pos.fcs 2991
## 5: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3042
print(dat[ , .(nCells = length(unique(id)))])
## nCells
## 1: 33288
set.seed(rand_seed)
# choose the tsne channels
tsne_channels = good_channels
if (load_tsne & file.exists(fn_tsne)){
load(fn_tsne)
} else {
tsne = bbRtools::calcTSNE(dat, tsne_channels, value_var = 'counts_transf',
id_var = 'id', group_var='condition', scale = F, subsample_groups=10000, subsample_mode='unequal', multicore=20)
save(tsne, file=fn_tsne)
}
## [1] "Using no_dims = 2, perplexity = 30.000000, and theta = 0.500000; distance_precomputed = 0 "
## [2] "Computing input similarities..."
## [3] "Building tree..."
## [4] " - point 10000 of 33272"
## [5] " - point 20000 of 33272"
## [6] " - point 30000 of 33272"
## [7] "Done in 2.5877 seconds (sparsity = 0.004252)!"
## [8] "Learning embedding..."
## [9] "Iteration 50: error is 110.011779 (50 iterations in 5.294261 seconds)"
## [10] "Iteration 100: error is 110.011632 (50 iterations in 5.765812 seconds)"
## [11] "Iteration 150: error is 100.790237 (50 iterations in 5.847215 seconds)"
## [12] "Iteration 200: error is 96.544576 (50 iterations in 5.702226 seconds)"
## [13] "Iteration 250: error is 5.481384 (50 iterations in 5.856825 seconds)"
## [14] "Iteration 300: error is 4.538533 (50 iterations in 4.971295 seconds)"
## [15] "Iteration 350: error is 4.291247 (50 iterations in 4.715139 seconds)"
## [16] "Iteration 400: error is 4.140086 (50 iterations in 4.834828 seconds)"
## [17] "Iteration 450: error is 4.035494 (50 iterations in 5.052949 seconds)"
## [18] "Iteration 500: error is 3.958142 (50 iterations in 4.869244 seconds)"
## [19] "Iteration 550: error is 3.897061 (50 iterations in 4.770489 seconds)"
## [20] "Iteration 600: error is 3.847240 (50 iterations in 5.008296 seconds)"
## [21] "Iteration 650: error is 3.805561 (50 iterations in 4.808290 seconds)"
## [22] "Iteration 700: error is 3.770491 (50 iterations in 4.991799 seconds)"
## [23] "Iteration 750: error is 3.739777 (50 iterations in 5.024691 seconds)"
## [24] "Iteration 800: error is 3.712395 (50 iterations in 5.585260 seconds)"
## [25] "Iteration 850: error is 3.688375 (50 iterations in 5.490099 seconds)"
## [26] "Iteration 900: error is 3.666562 (50 iterations in 5.786320 seconds)"
## [27] "Iteration 950: error is 3.646962 (50 iterations in 5.606620 seconds)"
## [28] "Iteration 999: error is 3.629561 (50 iterations in 5.597064 seconds)"
## [29] "Fitting performed in 105.578723 seconds."
If selected remove all cells not present in the tsne
if (subsampled_cells_only){
########## subset the data to only include the subsampled cells
setkey(dat, id)
dat = dat[tsne$Y$id ,]
}
This defines a colormap with lots and lots of colors
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
Rseminar: example of ggplot2
marker = good_channels[1]
subset(dat, channel == marker)[tsne$Y]
## condition
## 1: MDM4x_P10_B2_IL4_1_CD68 pos.fcs
## 2: MDM4x_P10_B3_IC_1_CD68 pos.fcs
## 3: MDM4x_P10_B4_IL10_1_CD68 pos.fcs
## 4: MDM4x_P10_B5_GC_1_CD68 pos.fcs
## 5: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs
## ---
## 33268: MDM4x_P10_B3_IC_1_CD68 pos.fcs
## 33269: MDM4x_P10_B3_IC_1_CD68 pos.fcs
## 33270: MDM4x_P10_B3_IC_1_CD68 pos.fcs
## 33271: MDM4x_P10_B3_IC_1_CD68 pos.fcs
## 33272: MDM4x_P10_B3_IC_1_CD68 pos.fcs
## id channel counts
## 1: 1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs CD209 27.1289749
## 2: 1 MDM4x_P10_B3_IC_1_CD68 pos.fcs CD209 7.4247875
## 3: 1 MDM4x_P10_B4_IL10_1_CD68 pos.fcs CD209 6.8900189
## 4: 1 MDM4x_P10_B5_GC_1_CD68 pos.fcs CD209 2.2226202
## 5: 1 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs CD209 3.3131831
## ---
## 33268: 9995 MDM4x_P10_B3_IC_1_CD68 pos.fcs CD209 0.6206078
## 33269: 9996 MDM4x_P10_B3_IC_1_CD68 pos.fcs CD209 0.0000000
## 33270: 9997 MDM4x_P10_B3_IC_1_CD68 pos.fcs CD209 3.6047390
## 33271: 9998 MDM4x_P10_B3_IC_1_CD68 pos.fcs CD209 0.0000000
## 33272: 9999 MDM4x_P10_B3_IC_1_CD68 pos.fcs CD209 9.8142004
## counts_transf counts_scaled sum_all bh_V1 bh_V2
## 1: 2.3926975 0.205122345 2600.5371 1.345933 -24.432269
## 2: 1.1863901 0.056138864 1373.5257 25.981259 2.711590
## 3: 1.1251302 0.052095475 2113.7750 -4.959740 12.619178
## 4: 0.4310509 0.016805245 1737.7808 -18.168732 15.275452
## 5: 0.6217888 0.025050997 2706.4836 -20.130232 14.366106
## ---
## 33268: 0.1238050 0.004692419 574.2401 6.078643 -6.179152
## 33269: 0.0000000 0.000000000 2643.3191 22.915985 8.332606
## 33270: 0.6697432 0.027255453 1192.3619 6.286984 1.480164
## 33271: 0.0000000 0.000000000 1981.0100 28.179845 7.400114
## 33272: 1.4268925 0.074205229 2259.0834 22.919115 14.545141
p = ggplot(subset(dat, channel == marker)[tsne$Y] , aes(x=bh_V1, y=bh_V2, color=counts_transf))+
geom_density2d()+
geom_point(alpha=0.5, size=1, aes(shape=condition))+
scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts')+
ggtitle(marker)
p
Look at an example channel mapped on the tsne
#plot the t-SNE Map by marker
print(good_channels)
## [1] "CD209" "CD64" "CD38" "CD68" "CD36" "CD71" "CD155"
## [8] "CD206" "CD13" "CD204" "CD123" "CD11b" "CD40" "Slamf7"
## [15] "CD197" "CD273" "CD304" "CD166" "CD82" "CD274" "CD119"
## [22] "CXCR4" "CD87" "CD120b" "CD4" "CD32" "CD16" "CD14"
## [29] "CD163" "CD169" "CD86" "HLA.ABC" "CD81" "CD88" "HLA.DR"
## [36] "CD54"
# you can replace good_channels[1] with any marker name
marker = good_channels[1]
p =subset(dat, channel == marker)[tsne$Y] %>%
ggplot(aes(x=bh_V1, y=bh_V2, color=censor_dat(counts,censor_val)))+
geom_point(alpha=0.5, size=1)+
scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts')+
ggtitle(marker)
p
Rseminar: example of plutting multiple markers
plot_markers = c("CD38" , "CD68" , "CD36" , "CD71" )
for (marker in plot_markers){
p =subset(dat, channel == marker)[tsne$Y] %>%
ggplot(aes(x=bh_V1, y=bh_V2, color=censor_dat(counts,censor_val)))+
geom_point(alpha=0.5, size=1)+
scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts')+
ggtitle(marker)
print(p)
}
Rseminar: example of plutting multiple markers with faceting
plot_markers = c("CD38" , "CD68" , "CD36" , "CD71" )
p =subset(dat, channel %in% plot_markers)[tsne$Y] %>%
ggplot(aes(x=bh_V1, y=bh_V2, color=censor_dat(counts,censor_val)))+
facet_wrap(~channel)+
geom_point(alpha=0.5, size=1)+
scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts')+
ggtitle('Multiple markers')
print(p)
Make an overview plot of all the channels on the tsne
#plot a t-SNE Map for all the markers
dat[, c_counts := bbRtools::censor_dat(counts_transf,censor_val), by=channel]
dat[, c_counts_scaled := c_counts, by=channel]
dat[, c_counts_scaled := (c_counts_scaled/max(c_counts_scaled)), by=channel]
dat[c_counts_scaled < 0, c_counts_scaled := 0, by=channel]
p = subset(dat, channel %in% good_channels)[tsne$Y] %>%
ggplot(aes(x=bh_V1, y=bh_V2, color=c_counts_scaled))+
facet_wrap(~channel, scales = "free", ncol = 10)+
geom_point(alpha=0.5, size=0.5)+
scale_color_gradientn(colours=rev(brewer.pal(11, 'Spectral')), name='Counts')+
ggtitle('')+
theme(strip.background = element_blank(),
axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank())
p
if (load_phenograph & file.exists(fn_phenograph)){
load(fn_phenograph)
} else {
# with joining with the tsne data, only the cells used for tsne are used
cluster_pheno = bbRtools::do_phenograph(dat, good_channels, valuevar='counts_transf', idvar = 'id', k = 10, seed = rand_seed)
save(cluster_pheno, file=fn_phenograph)
}
## Warning in fun(libname, pkgname): couldn't connect to display ":0"
## Run Rphenograph starts:
## -Input data of 33288 rows and 36 columns
## -k is set to 10
## Finding nearest neighbors...DONE ~ 199.865 s
## Compute jaccard coefficient between nearest-neighbor sets...DONE ~ 1.048 s
## Build undirected graph from the weighted links...DONE ~ 0.891 s
## Run louvain clustering on the graph ...DONE ~ 1.154 s
## Run Rphenograph DONE, took a total of 202.958s.
## Return a community class
## -Modularity value: 0.8383463
## -Number of clusters: 19
# with joining with the tsne data, only the cells used for tsne are used
if (load_flowsom & file.exists(fn_flowsom)){
load(fn_flowsom)
} else {
# with joining with the tsne data, only the cells used for tsne are used
cluster_flowsom = bbRtools::do_flowsom(dat, good_channels, valuevar='counts_transf', idvar = 'id', k = 10, seed = rand_seed)
save(cluster_flowsom, file=fn_flowsom)
}
## Building SOM
## Mapping data to SOM
## Building MST
## end fraction
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
## clustered
Map phenograph on the tsne
p = subset(dat, !duplicated(id))[tsne$Y][cluster_pheno] %>%
ggplot(aes(x=bh_V1, y=bh_V2))+
geom_point(size=0.3, alpha=1, aes(color=cluster, shape=condition))+
#scale_colour_brewer(palette="Paired")+
scale_color_manual(values = col_vector[10:70])+
ggtitle('Conditions')+
#stat_density2d(alpha=0.5, group=1, color='black')+
guides(color=guide_legend(override.aes=list(size=5)))+
theme(strip.background = element_blank(),
panel.background=element_rect(fill='white', colour = 'black'),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.background=element_blank(),
legend.key = element_blank())
p
## Warning: Removed 36 rows containing missing values (geom_point).
# Plot a heatmap of the phenograph clusters
Convert the data into a matrix to do a heatmap
summary_dat = dat[cluster_pheno][ channel %in% good_channels ,list(
median_val = median(counts),
mean_val= mean(counts),
cell_cluster=.N
), by=.(channel,cluster)]
hm_dat = dcast.data.table(data =summary_dat, formula = 'cluster ~ channel',
value.var = 'median_val')
# save column
trownames = hm_dat$cluster
# convert to a matrix
hm_dat = as.matrix(hm_dat[,-1,with=F])
row.names(hm_dat) = trownames
Plot the heatmap with z-scoring per marker
cols = rev(brewer.pal(11,'Spectral'))
cmap = colorRampPalette(cols)
tdist = as.dist(1-cor(t(hm_dat), method="spearman"))
#tdist = dist((hm_dat), method="euclidean")
hr <- hclust(tdist, method="ward.D")
co_r <- order.optimal(tdist, hr$merge)
hr$merge = co_r$merge
hr$order = co_r$order
order_heatmap_zscored = row.names(hm_dat)[hr$order]
tdist = as.dist(1-cor((hm_dat), method="spearman"))
#tdist = dist(t(hm_dat), method="euclidean")
hc <- hclust(tdist, method="ward.D")
co_c <- order.optimal(tdist, hc$merge)
hc$merge = co_c$merge
hc$order = co_c$order
#z-score
p_dat = scale(hm_dat)
# censor z-score at 2
p_dat[p_dat > 2] =2
p_dat[p_dat < -2] =-2
heatmap.2(p_dat,
scale ='none',
trace = "none",
col=cmap(75),
Rowv=as.dendrogram(hr),
Colv=as.dendrogram(hc),
density.info ='none',
#Colv =NA,
#keyorient=2,
#sepwidth = c(0,0),
cexRow=0.6,
cexCol=0.6,
margins=c(4,4),
xlab = 'Conditions',
ylab ='Cluster',
main = 'Cluster markers, z-scored'
)
Heatmap with 0-1 scaling on the transformed data
p_dat = asinh(hm_dat/5)
p_dat = t(t(p_dat) / (apply(p_dat, 2, function(x) quantile(x, 0.99))))
tdist = as.dist(1-cor(t(p_dat), method="spearman"))
hr <- hclust(tdist, method="ward.D")
co_r <- order.optimal(tdist, hr$merge)
hr$merge = co_r$merge
hr$order = co_r$order
order_heatmap_01 = row.names(hm_dat)[hr$order]
tdist = as.dist(1-cor((hm_dat), method="spearman"))
hc <- hclust(tdist, method="ward.D")
co_c <- order.optimal(tdist, hc$merge)
hc$merge = co_c$merge
hc$order = co_c$order
heatmap.2(p_dat,
scale ='none',
trace = "none",
col=cmap(75),
Rowv=as.dendrogram(hr),
#RowSideColors=sel_col_vector[group_levels],
#dendrogram='column',
Colv=as.dendrogram(hc),
density.info ='none',
#Colv =NA,
#keyorient=2,
xlab = 'Conditions',
ylab ='Cluster',
main = 'Cluster markers, 0-1 scaled',
#sepwidth = c(0,0),
cexRow=0.6,
cexCol=0.6,
margins=c(4,4)
#comments = data.frame(names = row.names(tab_Prot))
)
Prepare the data
##############################distribution of cluster per condtition in a table
dat_cluster_condition = dat[cluster_pheno][, .(cluster_mean = mean(counts_transf), ncells=.N),by=.(condition, channel, cluster)]
dat_cluster_condition[, id2:=paste(cluster, condition)]
setkey(dat_cluster_condition, 'id2')
cluster_dat = subset(dat_cluster_condition, !duplicated(id2))
cluster_dat[, frac_cluster := ncells/sum(ncells), by=condition]
cluster_dat[, frac_cells := ncells/sum(ncells), by=cluster]
# this will order the clusters the same as in the heatmap above
cluster_dat[, cluster_s := factor(as.character(cluster),levels = order_heatmap_zscored)]
Plot the fraction of cells per cluster as a heatmap
p <- ggplot(cluster_dat, aes(x=cluster_s, y=condition)) +
geom_tile(aes(fill = frac_cells), colour = "white") +
scale_fill_gradient(low = "white", high = "steelblue")+
theme(panel.background = element_blank())+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
p
Plot the cluster composition
#distribution of cluster per condtition in a staked bar graph
p <- ggplot(cluster_dat, aes(x=cluster_s, y=frac_cells, fill=condition)) +
geom_bar(stat='identity')+
scale_fill_manual(values = col_vector)+
coord_flip()+
theme(panel.background = element_blank())+
ggtitle('cluster composition')
p
Plot the condition composition
p <- ggplot(cluster_dat, aes(x=condition, y=frac_cluster, fill=cluster)) +
geom_bar(stat='identity')+
scale_fill_manual(values = col_vector)+
coord_flip()+
theme(panel.background = element_blank())+
ggtitle('Condition composition')
p
Distribution of cell number per cluster
#distribution of cells per cluster
cluster_sum =cluster_dat[, .(cells_cluster = sum(ncells)), by=cluster_s]
p <- ggplot(cluster_sum, aes(x=cluster_s, y=cells_cluster)) +
geom_bar(stat='identity')+
coord_flip()+
theme(panel.background = element_blank())+
ggtitle('distribution of cell numbers')
p
Distribution of conditions per cluster
###
cluster_dat[, frac_cells_condition := ncells/sum(ncells), by=condition]
cluster_dat[, frac_cells_scaled := frac_cells_condition/sum(frac_cells_condition), by=cluster]
cluster_dat[, cluster_s := factor(as.character(cluster),levels = row.names(hm_dat)[hr$order])]
p <- merge(cluster_dat, dat_meta,by = 'condition') %>%
ggplot(aes(x=cluster_s, y=frac_cells_scaled, fill=stimulation)) +
geom_bar(stat='identity')+
scale_fill_manual(values = col_vector)+
coord_flip()+
theme(panel.background = element_blank())+
ggtitle('Distribution of conditions per cluster')
p
p <- merge(cluster_dat, dat_meta,by = 'condition') %>%
ggplot(aes(x=cluster_s, y=frac_cells_scaled, fill=well)) +
geom_bar(stat='identity')+
scale_fill_manual(values = col_vector)+
coord_flip()+
theme(panel.background = element_blank())+
ggtitle('Distribution of conditions per cluster')
p
merge(cluster_dat, dat_meta,by = 'condition')
## condition channel cluster cluster_mean
## 1: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 1 15.55951
## 2: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 10 15.27975
## 3: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 12 15.04920
## 4: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 13 14.93846
## 5: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 15 15.65354
## 6: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 16 15.41448
## 7: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 17 14.96891
## 8: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 2 14.77665
## 9: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 3 14.56104
## 10: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 4 14.87216
## 11: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 5 15.21564
## 12: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 6 14.96687
## 13: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 7 14.85837
## 14: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 8 15.31478
## 15: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 9 15.08621
## 16: MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time <NA> 14.91948
## 17: MDM4x_P10_B3_IC_1_CD68 pos.fcs Time 1 14.69155
## 18: MDM4x_P10_B3_IC_1_CD68 pos.fcs Time 11 15.05925
## 19: MDM4x_P10_B3_IC_1_CD68 pos.fcs Time 13 15.84435
## 20: MDM4x_P10_B3_IC_1_CD68 pos.fcs Time 14 14.69747
## 21: MDM4x_P10_B3_IC_1_CD68 pos.fcs Time 15 15.12603
## 22: MDM4x_P10_B3_IC_1_CD68 pos.fcs Time 16 15.15223
## 23: MDM4x_P10_B3_IC_1_CD68 pos.fcs Time 18 14.67412
## 24: MDM4x_P10_B3_IC_1_CD68 pos.fcs Time 19 14.88106
## 25: MDM4x_P10_B3_IC_1_CD68 pos.fcs Time 6 15.26818
## 26: MDM4x_P10_B3_IC_1_CD68 pos.fcs Time <NA> 15.26710
## 27: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 1 15.28458
## 28: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 10 15.24115
## 29: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 12 14.77313
## 30: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 13 15.11324
## 31: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 15 15.49566
## 32: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 16 15.41977
## 33: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 17 15.54537
## 34: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 2 14.59519
## 35: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 3 14.85595
## 36: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 5 15.47432
## 37: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 6 14.86569
## 38: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 7 14.21040
## 39: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 8 15.02489
## 40: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time 9 14.84203
## 41: MDM4x_P10_B4_IL10_1_CD68 pos.fcs Time <NA> 15.42618
## 42: MDM4x_P10_B5_GC_1_CD68 pos.fcs Time 1 15.21211
## 43: MDM4x_P10_B5_GC_1_CD68 pos.fcs Time 10 15.13102
## 44: MDM4x_P10_B5_GC_1_CD68 pos.fcs Time 12 14.50188
## 45: MDM4x_P10_B5_GC_1_CD68 pos.fcs Time 13 14.84974
## 46: MDM4x_P10_B5_GC_1_CD68 pos.fcs Time 16 15.12043
## 47: MDM4x_P10_B5_GC_1_CD68 pos.fcs Time 2 14.36221
## 48: MDM4x_P10_B5_GC_1_CD68 pos.fcs Time 3 14.69176
## 49: MDM4x_P10_B5_GC_1_CD68 pos.fcs Time 6 14.78551
## 50: MDM4x_P10_B5_GC_1_CD68 pos.fcs Time 8 15.18509
## 51: MDM4x_P10_B5_GC_1_CD68 pos.fcs Time 9 14.98002
## 52: MDM4x_P10_B5_GC_1_CD68 pos.fcs Time <NA> 15.31220
## 53: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Time 1 15.18601
## 54: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Time 10 15.00187
## 55: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Time 12 15.12195
## 56: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Time 13 14.96634
## 57: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Time 14 15.71903
## 58: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Time 16 14.51320
## 59: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Time 2 14.70602
## 60: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Time 3 14.66166
## 61: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Time 5 15.89804
## 62: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Time 6 14.91729
## 63: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Time 8 14.85298
## 64: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Time 9 14.78067
## condition channel cluster cluster_mean
## ncells id2 frac_cluster
## 1: 10 1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.154068e-03
## 2: 26 10 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 3.000577e-03
## 3: 14 12 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.615695e-03
## 4: 22 13 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 2.538950e-03
## 5: 1 15 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.154068e-04
## 6: 1 16 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.154068e-04
## 7: 130 17 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.500289e-02
## 8: 6188 2 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 7.141373e-01
## 9: 5 3 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 5.770340e-04
## 10: 69 4 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 7.963070e-03
## 11: 2040 5 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 2.354299e-01
## 12: 31 6 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 3.577611e-03
## 13: 99 7 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.142527e-02
## 14: 3 8 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 3.462204e-04
## 15: 19 9 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 2.192729e-03
## 16: 7 NA MDM4x_P10_B2_IL4_1_CD68 pos.fcs 8.078477e-04
## 17: 1 1 MDM4x_P10_B3_IC_1_CD68 pos.fcs 9.984026e-05
## 18: 120 11 MDM4x_P10_B3_IC_1_CD68 pos.fcs 1.198083e-02
## 19: 1 13 MDM4x_P10_B3_IC_1_CD68 pos.fcs 9.984026e-05
## 20: 4638 14 MDM4x_P10_B3_IC_1_CD68 pos.fcs 4.630591e-01
## 21: 2621 15 MDM4x_P10_B3_IC_1_CD68 pos.fcs 2.616813e-01
## 22: 28 16 MDM4x_P10_B3_IC_1_CD68 pos.fcs 2.795527e-03
## 23: 129 18 MDM4x_P10_B3_IC_1_CD68 pos.fcs 1.287939e-02
## 24: 2469 19 MDM4x_P10_B3_IC_1_CD68 pos.fcs 2.465056e-01
## 25: 3 6 MDM4x_P10_B3_IC_1_CD68 pos.fcs 2.995208e-04
## 26: 6 NA MDM4x_P10_B3_IC_1_CD68 pos.fcs 5.990415e-04
## 27: 131 1 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 1.527875e-02
## 28: 1809 10 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 2.109867e-01
## 29: 5300 12 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 6.181479e-01
## 30: 5 13 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 5.831584e-04
## 31: 1 15 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 1.166317e-04
## 32: 5 16 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 5.831584e-04
## 33: 1 17 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 1.166317e-04
## 34: 9 2 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 1.049685e-03
## 35: 142 3 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 1.656170e-02
## 36: 2 5 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 2.332634e-04
## 37: 626 6 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 7.301143e-02
## 38: 3 7 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 3.498950e-04
## 39: 93 8 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 1.084675e-02
## 40: 443 9 MDM4x_P10_B4_IL10_1_CD68 pos.fcs 5.166783e-02
## 41: 4 NA MDM4x_P10_B4_IL10_1_CD68 pos.fcs 4.665267e-04
## 42: 814 1 MDM4x_P10_B5_GC_1_CD68 pos.fcs 2.721498e-01
## 43: 149 10 MDM4x_P10_B5_GC_1_CD68 pos.fcs 4.981612e-02
## 44: 42 12 MDM4x_P10_B5_GC_1_CD68 pos.fcs 1.404213e-02
## 45: 27 13 MDM4x_P10_B5_GC_1_CD68 pos.fcs 9.027081e-03
## 46: 10 16 MDM4x_P10_B5_GC_1_CD68 pos.fcs 3.343363e-03
## 47: 1 2 MDM4x_P10_B5_GC_1_CD68 pos.fcs 3.343363e-04
## 48: 1678 3 MDM4x_P10_B5_GC_1_CD68 pos.fcs 5.610164e-01
## 49: 51 6 MDM4x_P10_B5_GC_1_CD68 pos.fcs 1.705115e-02
## 50: 33 8 MDM4x_P10_B5_GC_1_CD68 pos.fcs 1.103310e-02
## 51: 183 9 MDM4x_P10_B5_GC_1_CD68 pos.fcs 6.118355e-02
## 52: 3 NA MDM4x_P10_B5_GC_1_CD68 pos.fcs 1.003009e-03
## 53: 779 1 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 2.560815e-01
## 54: 73 10 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 2.399737e-02
## 55: 30 12 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 9.861933e-03
## 56: 77 13 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 2.531229e-02
## 57: 1 14 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.287311e-04
## 58: 9 16 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 2.958580e-03
## 59: 2 2 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 6.574622e-04
## 60: 1892 3 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 6.219592e-01
## 61: 1 5 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.287311e-04
## 62: 45 6 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 1.479290e-02
## 63: 27 8 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 8.875740e-03
## 64: 106 9 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.484550e-02
## ncells id2 frac_cluster
## frac_cells cluster_s frac_cells_condition frac_cells_scaled mdm
## 1: 0.0057636888 1 1.154068e-03 0.0021184736 MDM4x
## 2: 0.0126397667 10 3.000577e-03 0.0104258827 MDM4x
## 3: 0.0025993316 12 1.615695e-03 0.0025101391 MDM4x
## 4: 0.1666666667 13 2.538950e-03 0.0675947893 MDM4x
## 5: 0.0003812429 15 1.154068e-04 0.0004406297 MDM4x
## 6: 0.0188679245 16 1.154068e-04 0.0117809707 MDM4x
## 7: 0.9923664122 17 1.500289e-02 0.9922860182 MDM4x
## 8: 0.9980645161 2 7.141373e-01 0.9971494778 MDM4x
## 9: 0.0013451708 3 5.770340e-04 0.0004808159 MDM4x
## 10: 1.0000000000 4 7.963070e-03 1.0000000000 MDM4x
## 11: 0.9985315712 5 2.354299e-01 0.9976185857 MDM4x
## 12: 0.0410052910 6 3.577611e-03 0.0329028332 MDM4x
## 13: 0.9705882353 7 1.142527e-02 0.9702853498 MDM4x
## 14: 0.0192307692 8 3.462204e-04 0.0111318434 MDM4x
## 15: 0.0252996005 9 2.192729e-03 0.0146289618 MDM4x
## 16: 0.3500000000 <NA> 8.078477e-04 0.2808512935 MDM4x
## 17: 0.0005763689 1 9.984026e-05 0.0001832725 MDM4x
## 18: 1.0000000000 11 1.198083e-02 1.0000000000 MDM4x
## 19: 0.0075757576 13 9.984026e-05 0.0026580601 MDM4x
## 20: 0.9997844363 14 4.630591e-01 0.9992905919 MDM4x
## 21: 0.9992375143 15 2.616813e-01 0.9991140639 MDM4x
## 22: 0.5283018868 16 2.795527e-03 0.2853733137 MDM4x
## 23: 1.0000000000 18 1.287939e-02 1.0000000000 MDM4x
## 24: 1.0000000000 19 2.465056e-01 1.0000000000 MDM4x
## 25: 0.0039682540 6 2.995208e-04 0.0027546543 MDM4x
## 26: 0.3000000000 <NA> 5.990415e-04 0.2082590534 MDM4x
## 27: 0.0755043228 1 1.527875e-02 0.0280465498 MDM4x
## 28: 0.8794360719 10 2.109867e-01 0.7330998690 MDM4x
## 29: 0.9840326773 12 6.181479e-01 0.9603525905 MDM4x
## 30: 0.0378787879 13 5.831584e-04 0.0155255012 MDM4x
## 31: 0.0003812429 15 1.166317e-04 0.0004453063 MDM4x
## 32: 0.0943396226 16 5.831584e-04 0.0595300391 MDM4x
## 33: 0.0076335878 17 1.166317e-04 0.0077139818 MDM4x
## 34: 0.0014516129 2 1.049685e-03 0.0014656746 MDM4x
## 35: 0.0382028518 3 1.656170e-02 0.0138001001 MDM4x
## 36: 0.0009789525 5 2.332634e-04 0.0009884380 MDM4x
## 37: 0.8280423280 6 7.301143e-02 0.6714768164 MDM4x
## 38: 0.0294117647 7 3.498950e-04 0.0297146502 MDM4x
## 39: 0.5961538462 8 1.084675e-02 0.3487497223 MDM4x
## 40: 0.5898801598 9 5.166783e-02 0.3447059019 MDM4x
## 41: 0.2000000000 <NA> 4.665267e-04 0.1621897736 MDM4x
## 42: 0.4691642651 1 2.721498e-01 0.4995737597 MDM4x
## 43: 0.0724355858 10 4.981612e-02 0.1730923641 MDM4x
## 44: 0.0077979948 12 1.404213e-02 0.0218158028 MDM4x
## 45: 0.2045454545 13 9.027081e-03 0.2403291532 MDM4x
## 46: 0.1886792453 16 3.343363e-03 0.3412975964 MDM4x
## 47: 0.0001612903 2 3.343363e-04 0.0004668336 MDM4x
## 48: 0.4514393328 3 5.610164e-01 0.4674691054 MDM4x
## 49: 0.0674603175 6 1.705115e-02 0.1568172854 MDM4x
## 50: 0.2115384615 8 1.103310e-02 0.3547414427 MDM4x
## 51: 0.2436750999 9 6.118355e-02 0.4081907405 MDM4x
## 52: 0.1500000000 <NA> 1.003009e-03 0.3486998794 MDM4x
## 53: 0.4489913545 1 2.560815e-01 0.4700779443 MDM4x
## 54: 0.0354885756 10 2.399737e-02 0.0833818842 MDM4x
## 55: 0.0055699963 12 9.861933e-03 0.0153214676 MDM4x
## 56: 0.5833333333 13 2.531229e-02 0.6738924962 MDM4x
## 57: 0.0002155637 14 3.287311e-04 0.0007094081 MDM4x
## 58: 0.1698113208 16 2.958580e-03 0.3020180801 MDM4x
## 59: 0.0003225806 2 6.574622e-04 0.0009180140 MDM4x
## 60: 0.5090126446 3 6.219592e-01 0.5182499787 MDM4x
## 61: 0.0004894763 5 3.287311e-04 0.0013929763 MDM4x
## 62: 0.0595238095 6 1.479290e-02 0.1360484107 MDM4x
## 63: 0.1730769231 8 8.875740e-03 0.2853769917 MDM4x
## 64: 0.1411451398 9 3.484550e-02 0.2324743958 MDM4x
## frac_cells cluster_s frac_cells_condition frac_cells_scaled mdm
## plate well stimulation
## 1: P10 B2 IL4_1
## 2: P10 B2 IL4_1
## 3: P10 B2 IL4_1
## 4: P10 B2 IL4_1
## 5: P10 B2 IL4_1
## 6: P10 B2 IL4_1
## 7: P10 B2 IL4_1
## 8: P10 B2 IL4_1
## 9: P10 B2 IL4_1
## 10: P10 B2 IL4_1
## 11: P10 B2 IL4_1
## 12: P10 B2 IL4_1
## 13: P10 B2 IL4_1
## 14: P10 B2 IL4_1
## 15: P10 B2 IL4_1
## 16: P10 B2 IL4_1
## 17: P10 B3 IC_1
## 18: P10 B3 IC_1
## 19: P10 B3 IC_1
## 20: P10 B3 IC_1
## 21: P10 B3 IC_1
## 22: P10 B3 IC_1
## 23: P10 B3 IC_1
## 24: P10 B3 IC_1
## 25: P10 B3 IC_1
## 26: P10 B3 IC_1
## 27: P10 B4 IL10_1
## 28: P10 B4 IL10_1
## 29: P10 B4 IL10_1
## 30: P10 B4 IL10_1
## 31: P10 B4 IL10_1
## 32: P10 B4 IL10_1
## 33: P10 B4 IL10_1
## 34: P10 B4 IL10_1
## 35: P10 B4 IL10_1
## 36: P10 B4 IL10_1
## 37: P10 B4 IL10_1
## 38: P10 B4 IL10_1
## 39: P10 B4 IL10_1
## 40: P10 B4 IL10_1
## 41: P10 B4 IL10_1
## 42: P10 B5 GC_1
## 43: P10 B5 GC_1
## 44: P10 B5 GC_1
## 45: P10 B5 GC_1
## 46: P10 B5 GC_1
## 47: P10 B5 GC_1
## 48: P10 B5 GC_1
## 49: P10 B5 GC_1
## 50: P10 B5 GC_1
## 51: P10 B5 GC_1
## 52: P10 B5 GC_1
## 53: P10 B6 GC_TGFB
## 54: P10 B6 GC_TGFB
## 55: P10 B6 GC_TGFB
## 56: P10 B6 GC_TGFB
## 57: P10 B6 GC_TGFB
## 58: P10 B6 GC_TGFB
## 59: P10 B6 GC_TGFB
## 60: P10 B6 GC_TGFB
## 61: P10 B6 GC_TGFB
## 62: P10 B6 GC_TGFB
## 63: P10 B6 GC_TGFB
## 64: P10 B6 GC_TGFB
## plate well stimulation
(These annotations actually do not fit this example dataset - it is just an illustration how it can be done)
fn_clusterannot = './exampledata/example_clusteranotations.csv'
dat_clusterannot = fread(fn_clusterannot)
merge(dat[cluster_pheno], dat_clusterannot, by='cluster')
## cluster condition
## 1: 1 MDM4x_P10_B5_GC_1_CD68 pos.fcs
## 2: 1 MDM4x_P10_B5_GC_1_CD68 pos.fcs
## 3: 1 MDM4x_P10_B5_GC_1_CD68 pos.fcs
## 4: 1 MDM4x_P10_B5_GC_1_CD68 pos.fcs
## 5: 1 MDM4x_P10_B5_GC_1_CD68 pos.fcs
## ---
## 823033: 9 MDM4x_P10_B4_IL10_1_CD68 pos.fcs
## 823034: 9 MDM4x_P10_B4_IL10_1_CD68 pos.fcs
## 823035: 9 MDM4x_P10_B4_IL10_1_CD68 pos.fcs
## 823036: 9 MDM4x_P10_B4_IL10_1_CD68 pos.fcs
## 823037: 9 MDM4x_P10_B4_IL10_1_CD68 pos.fcs
## id channel counts
## 1: 1000 MDM4x_P10_B5_GC_1_CD68 pos.fcs Time 6.687669e+06
## 2: 1000 MDM4x_P10_B5_GC_1_CD68 pos.fcs Event_length 3.700000e+01
## 3: 1000 MDM4x_P10_B5_GC_1_CD68 pos.fcs MCB102 2.864088e+00
## 4: 1000 MDM4x_P10_B5_GC_1_CD68 pos.fcs MCB104 2.371512e+01
## 5: 1000 MDM4x_P10_B5_GC_1_CD68 pos.fcs MCB105 4.048273e+03
## ---
## 823033: 990 MDM4x_P10_B4_IL10_1_CD68 pos.fcs DNA193 3.874563e+03
## 823034: 990 MDM4x_P10_B4_IL10_1_CD68 pos.fcs Live194 2.893113e+02
## 823035: 990 MDM4x_P10_B4_IL10_1_CD68 pos.fcs Live195 2.729424e+02
## 823036: 990 MDM4x_P10_B4_IL10_1_CD68 pos.fcs beadDist 5.990276e+01
## 823037: 990 MDM4x_P10_B4_IL10_1_CD68 pos.fcs barcode 3.000000e+00
## counts_transf counts_scaled sum_all c_counts c_counts_scaled
## 1: 14.7994852 0.33216193 1038.041 14.7994852 0.9302322
## 2: 2.6991616 0.51388889 1038.041 2.6991616 0.7933328
## 3: 0.5453769 0.16090627 1038.041 0.5453769 0.2084160
## 4: 2.2607542 0.01672941 1038.041 2.2607542 0.3440973
## 5: 7.3897554 1.00000000 1038.041 7.3897554 0.9744026
## ---
## 823033: 7.3458978 0.82966404 1414.569 7.3458978 0.9622110
## 823034: 4.7512872 0.53272741 1414.569 4.7512872 0.8507562
## 823035: 4.6930540 0.56243807 1414.569 4.6930540 0.8557932
## 823036: 3.1781690 0.59502903 1414.569 3.1781690 0.8446354
## 823037: 0.5688249 0.60000000 1414.569 0.5688249 0.6453846
## celltype compartment
## 1: cd12321+ interesting
## 2: cd12321+ interesting
## 3: cd12321+ interesting
## 4: cd12321+ interesting
## 5: cd12321+ interesting
## ---
## 823033: unknown uninteresting
## 823034: unknown uninteresting
## 823035: unknown uninteresting
## 823036: unknown uninteresting
## 823037: unknown uninteresting
Whenever needed the cluster annotations can now be added to the cluster data
p <- merge(cluster_dat, dat_clusterannot, by='cluster') %>%
ggplot(aes(x=condition, y=frac_cluster, fill=celltype, color=cluster)) +
geom_bar(stat='identity')+
scale_fill_manual(values = col_vector)+
coord_flip()+
theme(panel.background = element_blank())+
ggtitle('Condition composition')
p
Condition metadata and cluster metadata can be even combined simultaneoulsy
p <- merge(cluster_dat, dat_clusterannot, by='cluster') %>%
merge(dat_meta, by='condition') %>%
ggplot(aes(x=stimulation, y=frac_cluster, fill=celltype)) +
geom_bar(stat='identity')+
scale_fill_manual(values = col_vector)+
coord_flip()+
theme(panel.background = element_blank())+
ggtitle('Condition composition')
print(p)
ggsave(file.path(out_folder, 'condtion_composition.pdf'),plot=p, width = 5, height = 5)
Rseminar: Colormap example
p <- merge(cluster_dat, dat_clusterannot, by='cluster') %>%
merge(dat_meta, by='condition') %>%
ggplot(aes(x=stimulation, y=frac_cluster, fill=celltype)) +
geom_bar(stat='identity')+
scale_fill_brewer(type = 'qual',palette = 'set1')+
coord_flip()+
theme(panel.background = element_blank())+
ggtitle('Condition composition')
## Warning in pal_name(palette, type): Unknown palette set1
print(p)
Example how to add the metadata:
merge(dat, dat_meta,by = 'condition')
## condition
## 1: MDM4x_P10_B2_IL4_1_CD68 pos.fcs
## 2: MDM4x_P10_B2_IL4_1_CD68 pos.fcs
## 3: MDM4x_P10_B2_IL4_1_CD68 pos.fcs
## 4: MDM4x_P10_B2_IL4_1_CD68 pos.fcs
## 5: MDM4x_P10_B2_IL4_1_CD68 pos.fcs
## ---
## 1764260: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs
## 1764261: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs
## 1764262: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs
## 1764263: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs
## 1764264: MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs
## id channel counts
## 1: 1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs Time 2187.266113
## 2: 1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs Event_length 51.000000
## 3: 1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs MCB102 1.252187
## 4: 1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs MCB104 850.113953
## 5: 1 MDM4x_P10_B2_IL4_1_CD68 pos.fcs MCB105 1341.456665
## ---
## 1764260: 999 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs DNA193 3209.162109
## 1764261: 999 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Live194 221.384903
## 1764262: 999 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs Live195 150.259659
## 1764263: 999 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs beadDist 77.466232
## 1764264: 999 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs barcode 5.000000
## counts_transf counts_scaled sum_all c_counts c_counts_scaled
## 1: 6.7741183 0.0001086367 2600.537 6.7741183 0.42579203
## 2: 3.0179292 0.7083333333 2600.537 3.0179292 0.88702446
## 3: 0.2478907 0.0703486341 2600.537 0.2478907 0.09473156
## 4: 5.8290883 0.5996978752 2600.537 5.8290883 0.88721440
## 5: 6.2852241 0.3498670418 2600.537 6.2852241 0.82876068
## ---
## 1764260: 7.1574750 0.6871810869 1425.255 7.1574750 0.93753026
## 1764261: 4.4837396 0.4076501769 1425.255 4.4837396 0.80284961
## 1764262: 4.0963508 0.3096321747 1425.255 4.0963508 0.74698249
## 1764263: 3.4345913 0.7694914306 1425.255 3.4345913 0.91278260
## 1764264: 0.8813736 1.0000000000 1425.255 0.8813736 1.00000000
## mdm plate well stimulation
## 1: MDM4x P10 B2 IL4_1
## 2: MDM4x P10 B2 IL4_1
## 3: MDM4x P10 B2 IL4_1
## 4: MDM4x P10 B2 IL4_1
## 5: MDM4x P10 B2 IL4_1
## ---
## 1764260: MDM4x P10 B6 GC_TGFB
## 1764261: MDM4x P10 B6 GC_TGFB
## 1764262: MDM4x P10 B6 GC_TGFB
## 1764263: MDM4x P10 B6 GC_TGFB
## 1764264: MDM4x P10 B6 GC_TGFB
How to summarize with data.table
dat[channel=='barcode', mean(counts)]
## [1] 2.451124
dat[, mean(counts), by=list(channel, condition)]
## channel condition V1
## 1: Time MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.030727e+07
## 2: Event_length MDM4x_P10_B2_IL4_1_CD68 pos.fcs 4.259469e+01
## 3: MCB102 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 3.579586e+00
## 4: MCB104 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 8.768262e+02
## 5: MCB105 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.432730e+03
## ---
## 261: DNA193 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.257350e+03
## 262: Live194 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.355503e+02
## 263: Live195 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 2.705967e+02
## 264: beadDist MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 6.862977e+01
## 265: barcode MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 5.000000e+00
dat[, .(meancounts=mean(counts),
n=.N),
, by=list(channel, condition)]
## channel condition meancounts n
## 1: Time MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.030727e+07 8665
## 2: Event_length MDM4x_P10_B2_IL4_1_CD68 pos.fcs 4.259469e+01 8665
## 3: MCB102 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 3.579586e+00 8665
## 4: MCB104 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 8.768262e+02 8665
## 5: MCB105 MDM4x_P10_B2_IL4_1_CD68 pos.fcs 1.432730e+03 8665
## ---
## 261: DNA193 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.257350e+03 3042
## 262: Live194 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 3.355503e+02 3042
## 263: Live195 MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 2.705967e+02 3042
## 264: beadDist MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 6.862977e+01 3042
## 265: barcode MDM4x_P10_B6_GC_TGFB_1_CD68 pos.fcs 5.000000e+00 3042
dat[, counts_mean_normalized := counts/mean(counts), by=list(channel, condition)]
dat[, counts_mean_normalized := counts/mean(counts), by=list(channel, condition)]
Example of renaming and deleting
dat[channel == "HLA.ABC", channel := 'HLA-ABC']
# remove a column
# add it as an example
dat[, channel3 := channel]
# remove it
dat[, channel3 := NULL]
#dat = dat[!channel %in% c('Time', 'barcode'),]
sessionInfo()
## R version 3.4.4 (2018-03-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/lib/libblas/libblas.so.3.6.0
## LAPACK: /usr/lib/lapack/liblapack.so.3.6.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggplot2_3.2.1 cba_0.2-20 proxy_0.4-22
## [4] gplots_3.0.1 dtplyr_0.0.3 dplyr_0.8.3
## [7] RColorBrewer_1.1-2 Rtsne_0.15 data.table_1.12.2
## [10] bbRtools_0.6.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.1.0 backports_1.1.4
## [3] Hmisc_4.1-1 VGAM_1.0-6
## [5] RcppEigen_0.3.3.4.0 plyr_1.8.4
## [7] igraph_1.2.4.1 ConsensusClusterPlus_1.42.0
## [9] lazyeval_0.2.2 sp_1.3-1
## [11] splines_3.4.4 flowCore_1.44.2
## [13] digest_0.6.20 foreach_1.4.4
## [15] htmltools_0.3.6 gdata_2.18.0
## [17] magrittr_1.5 checkmate_1.8.5
## [19] cluster_2.0.7-1 doParallel_1.0.11
## [21] openxlsx_4.1.0 shinyFiles_0.7.0
## [23] Rphenograph_0.99.1.9001 largeVis_0.2.2
## [25] matrixStats_0.54.0 xts_0.11-0
## [27] pdist_1.2 colorspace_1.3-2
## [29] rrcov_1.4-4 ggrepel_0.8.0
## [31] haven_1.1.2 tcltk_3.4.4
## [33] crayon_1.3.4 jsonlite_1.5
## [35] graph_1.56.0 survival_2.42-6
## [37] zoo_1.8-3 iterators_1.0.10
## [39] glue_1.3.1 gtable_0.3.0
## [41] car_3.0-2 BiocGenerics_0.24.0
## [43] DEoptimR_1.0-8 abind_1.4-5
## [45] VIM_4.7.0 scales_1.0.0
## [47] mvtnorm_1.0-8 cytofkit_1.10.0
## [49] miniUI_0.1.1.1 Rcpp_1.0.2
## [51] xtable_1.8-3 laeken_0.4.6
## [53] htmlTable_1.12 foreign_0.8-71
## [55] FlowSOM_1.10.0 Formula_1.2-3
## [57] stats4_3.4.4 tsne_0.1-3
## [59] vcd_1.4-4 htmlwidgets_1.2
## [61] acepack_1.4.1 pkgconfig_2.0.2
## [63] XML_3.98-1.16 nnet_7.3-12
## [65] tidyselect_0.2.5 labeling_0.3
## [67] rlang_0.4.0 reshape2_1.4.3
## [69] later_0.7.3 munsell_0.5.0
## [71] cellranger_1.1.0 tools_3.4.4
## [73] evaluate_0.14 stringr_1.3.1
## [75] yaml_2.2.0 knitr_1.20
## [77] zip_1.0.0 robustbase_0.93-2
## [79] caTools_1.17.1.1 purrr_0.3.2
## [81] RANN_2.6.1 nlme_3.1-137
## [83] mime_0.5 compiler_3.4.4
## [85] rstudioapi_0.7 curl_3.2
## [87] e1071_1.7-0 smoother_1.1
## [89] tibble_2.1.3 pcaPP_1.9-73
## [91] stringi_1.2.4 forcats_0.3.0
## [93] lattice_0.20-35 Matrix_1.2-14
## [95] vegan_2.5-2 permute_0.9-4
## [97] pillar_1.4.2 lmtest_0.9-36
## [99] bitops_1.0-6 corpcor_1.6.9
## [101] httpuv_1.4.5 R6_2.4.0
## [103] latticeExtra_0.6-28 promises_1.0.1
## [105] KernSmooth_2.23-15 gridExtra_2.3
## [107] rio_0.5.10 Rtsne.multicore_0.0.99
## [109] codetools_0.2-15 boot_1.3-20
## [111] colourpicker_1.0 MASS_7.3-50
## [113] gtools_3.8.1 assertthat_0.2.1
## [115] destiny_2.6.2 rprojroot_1.3-2
## [117] withr_2.1.2 mgcv_1.8-24
## [119] parallel_3.4.4 hms_0.4.2
## [121] rpart_4.1-13 class_7.3-14
## [123] rmarkdown_1.10 carData_3.0-1
## [125] TTR_0.23-3 Biobase_2.38.0
## [127] shiny_1.1.0 base64enc_0.1-3